Descriptive analytics of the public 2019 Corona Virus data
EDA
ETL
kaggle
Author
Oscar Cardec
Published
July 2, 2023
Introduction
The following assessment is concentrated on …
Code
import pandas as pdimport numpy as npimport plotly.express as px# Load the datasetdata = pd.read_csv('data/covid_19_data.csv')# Data Transformationdata.columns = ['SerialNum', 'ObservationDate', 'Province/State', 'Country/Region', 'Last Update', 'Confirmed', 'Deaths', 'Recovered']data['ObservationDate'] = pd.to_datetime(data['ObservationDate'])data.fillna(0, inplace=True)# Keep only numeric columns for aggregationnumeric_cols = ['Confirmed', 'Deaths', 'Recovered']# Aggregate data by country and datecountry_data = data.groupby(['Country/Region', 'ObservationDate'])[numeric_cols].sum().reset_index()
# Time Series Visualizationfig = px.area(country_data, x='ObservationDate', y='Confirmed', color='Country/Region', title='Confirmed Cases Over Time')fig.show()
Code
# Bar chart for top 10 countries by confirmed casestop_countries = country_data[country_data['ObservationDate'] == country_data['ObservationDate'].max()]top_countries = top_countries.sort_values(by='Confirmed', ascending=False).head(10)fig = px.bar(top_countries, x='Country/Region', y='Confirmed', title='Top 10 Countries by Confirmed Cases')fig.show()
Code
# Choropleth map of global spreadlatest_data = data[data['ObservationDate'] == data['ObservationDate'].max()]fig = px.choropleth(latest_data, locations="Country/Region", locationmode='country names', color="Confirmed", hover_name="Country/Region", color_continuous_scale='Reds', title='Global Spread of COVID-19')fig.show()
Code
# Pie chart of Confirmed cases by top 10 countriesfig = px.pie(top_countries, values='Confirmed', names='Country/Region', title='Proportion of Confirmed Cases by Country')fig.show()
Code
# Scatter plot of Confirmed vs Deathsfig = px.scatter(country_data, x='Confirmed', y='Deaths', color='Country/Region', title='Confirmed vs Deaths')fig.show()